In [7]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn import model_selection
from sklearn import metrics
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y,
test_size=0.25, random_state=0)
for Model in [GaussianNB, KNeighborsClassifier, LinearSVC]:
clf = Model().fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('%s: %s' %
(Model.__name__, metrics.f1_score(y_test, y_pred, average="macro")))
In [8]:
clf = KNeighborsClassifier()
from sklearn.model_selection import cross_val_score
cross_val_score(clf, X, y, cv=5)
Out[8]:
We can use different splitting strategies, such as random splitting (There exists many different cross-validation strategies in scikit-learn. They are often useful to take in account non iid datasets)
In [10]:
from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(n_splits=5)
cross_val_score(clf, X, y, cv=cv)
Out[10]:
In [11]:
from sklearn.datasets import load_diabetes
data = load_diabetes()
X, y = data.data, data.target
print(X.shape)
In [ ]:
In [12]:
from sklearn.linear_model import Ridge, Lasso
for Model in [Ridge, Lasso]:
model = Model()
print('%s: %s' % (Model.__name__, cross_val_score(model, X, y).mean()))